In [1]:
from ydata_profiling import ProfileReport
import pandas as pd
In [2]:
df = pd.read_csv("data/Chicago_data.tsv", sep= '\t')
print(df)
CRASH_RECORD_ID CRASH_DATE_EST_I \
0 6c1659069e9c6285a650e70d6f9b574ed5f64c12888479... NaN
1 5f54a59fcb087b12ae5b1acff96a3caf4f2d37e79f8db4... NaN
2 61fcb8c1eb522a6469b460e2134df3d15f82e81fd93e9c... NaN
3 004cd14d0303a9163aad69a2d7f341b7da2a8572b2ab33... NaN
4 a1d5f0ea90897745365a4cbb06cc60329a120d89753fac... NaN
... ... ...
817718 6dee8823d4ae96624b741428681d19f50b5960418b6d79... NaN
817719 61c8dcd63fae60613bc9ec526fa901420cbe99a6d35840... NaN
817720 54d55bfcc6627f587abbe0d14c42e51b812f930566fb06... NaN
817721 6b6f5ceb4053bfbb3483fb453231caa94ff2351bde4c9d... NaN
817722 2ee6209bde600a6ae2f12fb385b1e5749803cc01d0e954... NaN
CRASH_DATE POSTED_SPEED_LIMIT TRAFFIC_CONTROL_DEVICE \
0 08/18/2023 12:50:00 PM 15 OTHER
1 07/29/2023 02:45:00 PM 30 TRAFFIC SIGNAL
2 08/18/2023 05:58:00 PM 30 NO CONTROLS
3 11/26/2019 08:38:00 AM 25 NO CONTROLS
4 08/18/2023 10:45:00 AM 20 NO CONTROLS
... ... ... ...
817718 09/02/2023 06:25:00 PM 30 TRAFFIC SIGNAL
817719 07/10/2023 12:29:00 PM 30 TRAFFIC SIGNAL
817720 12/28/2019 01:16:00 AM 35 UNKNOWN
817721 10/23/2019 01:32:00 PM 30 TRAFFIC SIGNAL
817722 05/29/2023 09:40:00 PM 30 UNKNOWN
DEVICE_CONDITION WEATHER_CONDITION LIGHTING_CONDITION \
0 FUNCTIONING PROPERLY CLEAR DAYLIGHT
1 FUNCTIONING PROPERLY CLEAR DAYLIGHT
2 NO CONTROLS CLEAR DAYLIGHT
3 NO CONTROLS CLEAR DAYLIGHT
4 NO CONTROLS CLEAR DAYLIGHT
... ... ... ...
817718 FUNCTIONING PROPERLY CLEAR DAYLIGHT
817719 FUNCTIONING PROPERLY CLEAR DAYLIGHT
817720 UNKNOWN CLEAR DARKNESS, LIGHTED ROAD
817721 FUNCTIONING PROPERLY CLEAR DAYLIGHT
817722 UNKNOWN CLEAR DARKNESS
FIRST_CRASH_TYPE TRAFFICWAY_TYPE ... \
0 REAR END OTHER ...
1 PARKED MOTOR VEHICLE DIVIDED - W/MEDIAN (NOT RAISED) ...
2 PEDALCYCLIST NOT DIVIDED ...
3 PEDESTRIAN ONE-WAY ...
4 FIXED OBJECT OTHER ...
... ... ... ...
817718 REAR END NOT DIVIDED ...
817719 TURNING FOUR WAY ...
817720 PARKED MOTOR VEHICLE ONE-WAY ...
817721 PEDESTRIAN DIVIDED - W/MEDIAN (NOT RAISED) ...
817722 TURNING OTHER ...
INJURIES_NON_INCAPACITATING INJURIES_REPORTED_NOT_EVIDENT \
0 1.0 0.0
1 0.0 0.0
2 1.0 0.0
3 0.0 0.0
4 0.0 0.0
... ... ...
817718 0.0 0.0
817719 0.0 0.0
817720 0.0 0.0
817721 0.0 0.0
817722 0.0 0.0
INJURIES_NO_INDICATION INJURIES_UNKNOWN CRASH_HOUR CRASH_DAY_OF_WEEK \
0 1.0 0.0 12 6
1 1.0 0.0 14 7
2 1.0 0.0 17 6
3 1.0 0.0 8 3
4 1.0 0.0 10 6
... ... ... ... ...
817718 2.0 0.0 18 7
817719 2.0 0.0 12 2
817720 1.0 0.0 1 7
817721 0.0 0.0 13 4
817722 2.0 0.0 21 2
CRASH_MONTH LATITUDE LONGITUDE \
0 8 NaN NaN
1 7 41.854120 -87.665902
2 8 41.942976 -87.761883
3 11 NaN NaN
4 8 NaN NaN
... ... ... ...
817718 9 41.758092 -87.624902
817719 7 41.857531 -87.644929
817720 12 41.685142 -87.628557
817721 10 41.751046 -87.625378
817722 5 41.830922 -87.631651
LOCATION
0 NaN
1 POINT (-87.665902342962 41.854120262952)
2 POINT (-87.761883496974 41.942975745006)
3 NaN
4 NaN
... ...
817718 POINT (-87.624902228247 41.758092176383)
817719 POINT (-87.644928607359 41.857530859236)
817720 POINT (-87.628556919131 41.685141540233)
817721 POINT (-87.625377942917 41.751045778094)
817722 POINT (-87.631650518377 41.830922441769)
[817723 rows x 48 columns]
In [3]:
profile = ProfileReport(df, title = "Chicago Data Report")
In [4]:
profile.to_notebook_iframe()
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]